IP_ADDRESSES=$(hostname -I | tr ' ' '\n' | grep -v '^127\.0\.0\.1$' | grep -v '172*')

export PYTHONPATH=/usr/local/Ascend:$PYTHONPATH
export VLLM_LLMDATADIST_ZMQ_PORT=5569
export ASCEND_RT_VISIBLE_DEVICES=8,9,10,11,12,13,14,15
RANK_TABLE_PATH=/data/lumin/omni_infer/tools/scripts/global_path

# ranktable文件按实际机器ip和卡数修改
export GLOBAL_RANK_TABLE_FILE_PATH=/data/lumin/omni_infer/tools/scripts/global_path/global_ranktable_merge.json
export RANK_TABLE_FILE_PATH=/data/lumin/omni_infer/tools/scripts/decode-ranktable/local_ranktable_7.150.13.139_89101112131415.json
export LOCAL_DECODE_SERVER_IP_LIST="7.150.13.139"
export GLOBAL_DECODE_SERVER_IP_LIST="7.150.13.139"

export ROLE=decode
# export MODEL_EXTRA_CFG_PATH="/data/lumin/omni_infer/tests/st/deepseek_v3/test_decode.json"

export HCCL_INTRA_ROCE_ENABLE=1
export HCCL_INTRA_PCIE_ENABLE=0
# 入图环境变量，decode开启
export HCCL_BUFFSIZE=2000
export HCCL_OP_EXPANSION_MODE=AIV
export VLLM_ENABLE_MC2=1

export DUMP_GE_GRAPH=2      #控制dump图的内容多少
export DUMP_GRAPH_LEVEL=3   #控制dump图的个数
# 调试用
# export ASCEND_LAUNCH_BLOCKING=1
# DECODE
export DECODE_DP_SIZE=1
export MOE_DISPATCH_COMBINE=1

# 调精度用
export HCCL_DETERMINISTIC=true
export CLOSE_MATMUL_K_SHIFT=1

# 1P1D时配置如下
export PREFILL_POD_NUM=1
export DECODE_POD_NUM=1

# export FORWARD_TIME=30
export OMNI_USE_QWEN=1

export VLLM_LOGGING_LEVEL=INFO

export GLOO_SOCKET_IFNAME=enp23s0f3
export TP_SOCKET_IFNAME=enp23s0f3
# export ASCEND_RT_VISIBLE_DEVICES=6
export VLLM_USE_V1=1
export VLLM_WORKER_MULTIPROC_METHOD=fork
# export VLLM_ENABLE_MC2=0
# export USING_LCCL_COM=0

python  /data/lumin/omni_infer/tools/scripts/start_api_servers.py \
    --num-servers 1 \
    --server-offset ${SERVER_OFFSET:-0} \
    --num-dp ${DECODE_DP_SIZE} \
    --model-path /data/model/QwQ-32B \
    --master-ip 7.150.13.139 \
    --master-port 8503 \
    --base-api-port 5550 \
    --tp 8 \
    --gpu-util 0.9 \
    --served-model-name qwen \
    --max-model-len 40960 \
    --log-dir ./apiserverlog/decode/ \
    --no-enable-prefix-caching \
    --no-enable-chunked-prefill \
    --extra-args "--max-num-batched-tokens 40960 " \
    --additional-config '{"enable_graph_mode": true, "decode_gear_list": [ 64 ]}' \
    --kv-transfer-config \
    '{
        "kv_connector": "AscendHcclConnectorV1",
        "kv_buffer_device": "npu",
        "kv_role": "kv_consumer",
        "kv_rank": 1,
        "engine_id": 0,
        "kv_parallel_size": 2
    }' &

# --extra-args "--max-num-batched-tokens 65536 " \